Contents

library(tidyverse)
library(magrittr)
library(ggbeeswarm)

1 Load data

1.1 Variance-stabilized transformed expression values for all genes in all individuals

#counts <- read.delim(paste0(dir, "Data/count_data.csv"), sep = ",")
counts <- read.delim(paste0(dir, "Data/vst_counts.csv"), sep = ",")
#mean_counts <- read.delim(paste0(dir, "Data/mean_counts.csv"), sep = ",")
head(counts)
##        ensembl_id     gene      names    counts Age age_group  sex
## 1 ENSG00000000003   TSPAN6 SRR7093810  8.717334  12      1-15 male
## 2 ENSG00000000419     DPM1 SRR7093810  9.451970  12      1-15 male
## 3 ENSG00000000457    SCYL3 SRR7093810  8.221156  12      1-15 male
## 4 ENSG00000000460 C1orf112 SRR7093810  7.599157  12      1-15 male
## 5 ENSG00000000971      CFH SRR7093810  8.094194  12      1-15 male
## 6 ENSG00000001036    FUCA2 SRR7093810 11.114842  12      1-15 male

1.2 TF groups from the Steiner trees

pcst_groups <- read.delim(paste0(dir, "Data/specific_TFs_targets.csv"), sep = ",")  %>%
  mutate(net = factor(net, levels = c('young_net', 'middle_net', 'old_net', 'shared')))
head(pcst_groups)
##      TF    net     target DE_gene
## 1 FOXO3 shared      NR0B2   False
## 2 FOXO3 shared CTB-52I2.4   False
## 3 FOXO3 shared        FUZ   False
## 4 FOXO3 shared     AKT1S1   False
## 5 FOXO3 shared    TBC1D17   False
## 6 FOXO3 shared      RDH13   False

2 PCST groups

plot_TF_expression <- function(TFs){
  TF_expr <- filter(counts, gene %in% TFs) %>%
    # if two ensembl ids correspond to the same TF take the mean count
    group_by(gene, names, Age, age_group) %>%
    summarize(counts = mean(counts)) 
  
  ggplot(TF_expr, aes(x = Age, y = counts, group = gene, color = gene)) +
    geom_point(color = 'grey') +
    theme_bw() + 
    facet_wrap(~gene, scales = "free_y") +
    theme(legend.position="none") + 
    theme(text = element_text(size = 13),
          strip.text.x = element_text(size = 16)) +
    xlab("Age") + 
    ylab("FPKM") +
    geom_smooth(color = "red")
}

plot_TF_mean_expression <- function(TFs){
  TF_expr <- filter(counts, gene %in% TFs) %>%
    # if two ensembl ids correspond to the same TF take the mean count
    group_by(gene, names, Age, age_group) %>%
    summarize(counts = mean(counts)) 
  
  ggplot(TF_expr, aes(x = age_group, y = counts, group = gene, color = gene)) +
    geom_point(color = 'grey', alpha = 0.4) +
    #geom_beeswarm(color = 'grey', cex = 3) +
    theme_bw() + 
    facet_wrap(~gene, scales = "free_y") +
    theme(legend.position="none") + 
    theme(text = element_text(size = 13),
          strip.text.x = element_text(size = 15),
          axis.text.x = element_text(angle = 90)) +
    xlab("Age") + 
    ylab("Normalized expression") +
    geom_line(stat = "summary", fun = "mean", colour = "brown", size = 1, aes(group = 1))
}

TF_variability <- function(TFs){
  TF_expr <- filter(counts, gene %in% TFs) %>%
    # if two ensembl ids correspond to the same TF take the mean count
    group_by(gene, names, Age, age_group) %>%
    summarize(counts = mean(counts)) %>%
    ungroup() %>%
    # get median expression per age group
    group_by(gene, age_group) %>%
    summarize(median = median(counts)) %>%
    ungroup() %>%
    # calculate variance per TF
    group_by(gene) %>%
    summarize(variance = var(median)) %>%
    # order TFs from highest to lowest variance
    arrange(desc(variance))
  return(TF_expr)
}

2.1 Shared TFs

TFs <- unique(filter(pcst_groups, net == "shared")$TF)
plot_TF_mean_expression(TFs) +facet_wrap(~gene, scales = "free_y", nrow = 3)
## `summarise()` has grouped output by 'gene', 'names', 'Age'. You can override
## using the `.groups` argument.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

TF_variability(TFs)
## `summarise()` has grouped output by 'gene', 'names', 'Age'. You can override
## using the `.groups` argument.
## `summarise()` has grouped output by 'gene'. You can override using the
## `.groups` argument.
## # A tibble: 20 × 2
##    gene   variance
##    <chr>     <dbl>
##  1 GATA2   0.812  
##  2 STAT1   0.490  
##  3 NOTCH1  0.272  
##  4 KLF3    0.122  
##  5 TCF7L2  0.0974 
##  6 MAZ     0.0642 
##  7 FOXO3   0.0640 
##  8 STAT3   0.0639 
##  9 AHR     0.0517 
## 10 SRC     0.0216 
## 11 POLR2A  0.0160 
## 12 FLI1    0.0114 
## 13 HOXA6   0.0110 
## 14 HIF1A   0.00774
## 15 HDAC1   0.00627
## 16 ATF1    0.00521
## 17 NCOR1   0.00401
## 18 GTF2B   0.00391
## 19 ZNF384  0.00307
## 20 NR2C2   0.00196

2.2 Old-specific TFs

TFs <- unique(filter(pcst_groups, net == "old_net")$TF)

plot_TF_mean_expression(TFs) +facet_wrap(~gene, scales = "free_y", nrow = 3)
## `summarise()` has grouped output by 'gene', 'names', 'Age'. You can override
## using the `.groups` argument.

TF_variability(TFs)
## `summarise()` has grouped output by 'gene', 'names', 'Age'. You can override
## using the `.groups` argument.
## `summarise()` has grouped output by 'gene'. You can override using the
## `.groups` argument.
## # A tibble: 20 × 2
##    gene   variance
##    <chr>     <dbl>
##  1 E2F1    0.606  
##  2 ORC1    0.318  
##  3 NR3C1   0.0603 
##  4 ARNT    0.0437 
##  5 KDM4C   0.0379 
##  6 ELK1    0.0356 
##  7 CLOCK   0.0338 
##  8 GLIS1   0.0323 
##  9 CDK9    0.0285 
## 10 BMI1    0.0214 
## 11 TRIM24  0.0210 
## 12 THAP11  0.0166 
## 13 SMAD2   0.00719
## 14 KDM1A   0.00684
## 15 DDX5    0.00545
## 16 FOS     0.00522
## 17 TAF3    0.00456
## 18 ATRX    0.00412
## 19 BRD2    0.00367
## 20 MYH11   0.00174

2.3 Young-specific TFs

TFs <- unique(filter(pcst_groups, net == "young_net")$TF)
plot_TF_mean_expression(TFs)+facet_wrap(~gene, scales = "free_y", nrow = 3)
## `summarise()` has grouped output by 'gene', 'names', 'Age'. You can override
## using the `.groups` argument.

TF_variability(TFs)
## `summarise()` has grouped output by 'gene', 'names', 'Age'. You can override
## using the `.groups` argument.
## `summarise()` has grouped output by 'gene'. You can override using the
## `.groups` argument.
## # A tibble: 20 × 2
##    gene    variance
##    <chr>      <dbl>
##  1 MYBL2    1.08   
##  2 CENPA    0.519  
##  3 NCOR2    0.511  
##  4 GLI2     0.201  
##  5 TEAD4    0.113  
##  6 HOXA13   0.112  
##  7 POU2F2   0.102  
##  8 MEIS1    0.0948 
##  9 NR5A2    0.0848 
## 10 ZKSCAN1  0.0815 
## 11 EP300    0.0621 
## 12 TFAP2C   0.0505 
## 13 IRF1     0.0383 
## 14 RELA     0.0334 
## 15 CTNNB1   0.0115 
## 16 CEBPB    0.00848
## 17 MAX      0.00439
## 18 SIN3A    0.00383
## 19 CDK7     0.00292
## 20 RNF2     0.00283

2.4 Middle network specific TFs

TFs <-  unique(filter(pcst_groups, net == "middle_net")$TF)
plot_TF_mean_expression(TFs) +facet_wrap(~gene, scales = "free_y", ncol = 7)
## `summarise()` has grouped output by 'gene', 'names', 'Age'. You can override
## using the `.groups` argument.

2.5 Differentially expressed TFs

TF_targets <- read.delim(paste0(dir, "Data/tf-target-information.txt"))
p_0.05 <- read.delim(paste0(dir, "Data/DE_var_p_n_200.csv"), sep = ",")

DE_TFs <- filter(p_0.05, gene %in% TF_targets$TF) %>%
  filter(transition != "fc_61-85_86-96")
head(DE_TFs)
##     gene     transition abs_log2_fc
## 1    ERG  fc_1-15_16-26   1.8995106
## 2    FOS fc_27-60_61-85   2.6786344
## 3  FOXP2 fc_16-26_27-60   1.3404898
## 4 HOXA13  fc_1-15_16-26   1.9142726
## 5   MAFK fc_27-60_61-85   0.4445288
## 6  MECOM fc_27-60_61-85   0.9700591
dim(DE_TFs)
## [1] 17  3
plot_TF_mean_expression(unique(DE_TFs$gene))
## `summarise()` has grouped output by 'gene', 'names', 'Age'. You can override
## using the `.groups` argument.

2.6 MAZ, FLI1, ZNF384

TFs <-  c("MAZ", "FLI1", "ZNF384")
plot_TF_mean_expression(TFs) + facet_wrap(~gene, scales = "free_y", ncol = 3)
## `summarise()` has grouped output by 'gene', 'names', 'Age'. You can override
## using the `.groups` argument.

3 Expression trajectories for DE genes

3.1 Age group 1 vs 2

genes <-  c("PRXL2B", "MOAP1", "MYO19", "GMPR2", "TRIM39")
plot_TF_mean_expression(genes) + 
  facet_wrap(~gene, scales = "free_y", nrow = 1)
## `summarise()` has grouped output by 'gene', 'names', 'Age'. You can override
## using the `.groups` argument.

3.2 Age group 2 vs 3

genes <-  c("SKA1", "FLOT1", "PRXL2B", "RRN3", "MOAP1")
plot_TF_mean_expression(genes) + 
  facet_wrap(~gene, scales = "free_y", nrow = 1)
## `summarise()` has grouped output by 'gene', 'names', 'Age'. You can override
## using the `.groups` argument.

3.3 Age group 3 vs 4

genes <-  c("MYO19", "RING1", "PRXL2B", "FOXS1", "LEP")
plot_TF_mean_expression(genes) + 
  facet_wrap(~gene, scales = "free_y", nrow = 1)
## `summarise()` has grouped output by 'gene', 'names', 'Age'. You can override
## using the `.groups` argument.

3.4 Age group 4 vs 5

genes <-  c("MYO19", "FLOT1", "RING1", "TNXB", "PRXL2B")
plot_TF_mean_expression(genes) + 
  facet_wrap(~gene, scales = "free_y", nrow = 1)
## `summarise()` has grouped output by 'gene', 'names', 'Age'. You can override
## using the `.groups` argument.